Doppelganger Python

2021, Nov 17

In this project, I demonstrate how to use OpenCV and Dlib to create a celebrity look-alike application.

The application works by converting a face into a high-dimensional vector of points and then learns its feature space. Or put simply, it clusters the facial vector points together and then compares the distance between two clusters to determine if they are similar to each other.

In order to keep the project simple, a smaller dataset was created in the celeb_mini folder and a NumPy array celeb_mapping.npy was used to link the image folders to names of the celebrities.

Below is an overview video and diagram of my thought process with all code available in my GitHub repository.

Github code repository

Overview diagram of the project

Image input and what is expected as output diagram
Overview

Python code

import os
import glob
import dlib
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Set up the the faceDetector, shapePredictor and faceRecognizer to
# optimize speed as they are nedded by both functions.
faceDetector = dlib.get_frontal_face_detector()
shapePredictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
faceRecognizer = dlib.face_recognition_model_v1(
    "dlib_face_recognition_resnet_model_v1.dat"
)


def inrole_data(faceDetector, shapePredictor, faceRecognizer):
    """This function creates a face descriptors of (1x128) for each face
    in the images and stores them in in a NumPy array. It also creates a dictionary
    to store the the index of array to names of the celebrity also in NumPy array.

    Args:
        faceDetector Dlib: used to detect faces in an image
        shapePredictor Dlib: identifies the locations of import facial landmarks
        faceRecognizer Dlib:  maps human faces into 128D vectors
    """

    # create a dictionary to uses as a index for each face descriptors to celebrity name.
    index = {}
    i = 0
    # create a NumPy array to store face descriptors
    faceDescriptors = None

    # loop though the images in folders
    for images in os.listdir("celeb_mini"):
        imagefiles = os.listdir(os.path.join("celeb_mini", images))
        for image in imagefiles:
            imagePath = os.path.join("celeb_mini", images, image)

            #  read each image and convert to a format form Dlib
            img = cv2.imread(imagePath)
            imDli = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # look for faces in image
            faces = faceDetector(imDli)

            # Create descriptor and index for each image
            for face in faces:

                # Find facial landmarks for each detected face
                shape = shapePredictor(imDli, face)

                # Compute face descriptor using neural network defined in Dlib.
                faceDescriptor = faceRecognizer.compute_face_descriptor(img, shape)

                # Convert face descriptor from Dlib's format to list, then a NumPy array
                faceDescriptorList = [x for x in faceDescriptor]
                faceDescriptorNdarray = np.asarray(faceDescriptorList, dtype=np.float64)
                faceDescriptorNdarray = faceDescriptorNdarray[np.newaxis, :]

                # add face descriptors to the faceDescriptor Numpy array.
                if faceDescriptors is None:
                    faceDescriptors = faceDescriptorNdarray
                else:
                    faceDescriptors = np.concatenate(
                        (faceDescriptors, faceDescriptorNdarray), axis=0
                    )

                # map celebrity name corresponding to face descriptors and stored in NumPy Array
                index[i] = np.load("celeb_mapping.npy", allow_pickle=True).item()[
                    images
                ]
                i += 1
    # save
    np.save("index.npy", index)
    np.save("faceDescriptors.npy", faceDescriptors)


def lookalike(faceDetector, shapePredictor, faceRecognizer):
    """This function loads images from the images folder and compares
    them to prebuilt face descriptors to find the best match
    for a celebrity look alike.

    Args:
        faceDetector Dlib: used to detect faces in an image
        shapePredictor Dlib: identifies the locations of import facial landmarks
        faceRecognizer Dlib:  maps human faces into 128D vectors
    """
    # load face descriptors and the names index
    faceDescriptors = np.load("faceDescriptors.npy")
    index = np.load("index.npy", allow_pickle="TRUE").item()

    # read image
    testImages = glob.glob("images/*.jpg")

    # loop though the images folder
    for image in testImages:
        # read the image and convert to Dlib format
        im = cv2.imread(image)
        imDli = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

        # detect faces
        faces = faceDetector(imDli)

        # create descriptor and index for each image
        for face in faces:
            shape = shapePredictor(imDli, face)

            faceDescriptor = faceRecognizer.compute_face_descriptor(im, shape)

            faceDescriptorList = [x for x in faceDescriptor]
            faceDescriptorNdarray = np.asarray(faceDescriptorList, dtype=np.float64)
            faceDescriptorNdarray = faceDescriptorNdarray[np.newaxis, :]

            # calculate the distances of the new face wiht face descriptors of celebrities
            distances = np.linalg.norm(faceDescriptors - faceDescriptorNdarray, axis=1)
            argmin = np.argmin(distances)
            minDistance = distances[argmin]

            # find an acceptable lookalike
            if minDistance <= 0.8:
                label = index[argmin]
            else:
                label = "unknown"

            celeb_name = label

            # load celebrity images from celeb_mini folder
            for images in os.listdir("celeb_mini"):
                imagefiles = os.listdir(os.path.join("celeb_mini", images))

                if (
                    np.load("celeb_mapping.npy", allow_pickle=True).item()[images]
                    == celeb_name
                ):
                    for image in imagefiles:
                        img_cele = cv2.imread(os.path.join("celeb_mini", images, image))
                        img_cele = cv2.cvtColor(img_cele, cv2.COLOR_BGR2RGB)
                        break

        # show images one at a time
        plt.subplot(121)
        plt.imshow(imDli)
        plt.title("test img")

        plt.subplot(122)
        plt.imshow(img_cele)
        plt.title("Celeb Look-Alike={}".format(celeb_name))
        plt.show()


def main():

    # to save time and not recreate the descriptors skip if the folder and index already exist
    if not os.path.exists("index.npy") or not os.path.exists("faceDescriptors.npy"):
        print("building face descriptors")
        inrole_data(faceDetector, shapePredictor, faceRecognizer)

    # show posabale celebrity lookalikes
    lookalike(faceDetector, shapePredictor, faceRecognizer)


if __name__ == "__main__":
    main()